##Rashmi Raju Koparde -230322 ##Sathya Sudha Murugan-229638 ##Shweta Bhat - 229530
Loading libraries.
library(readxl)
library(data.table)
library(plyr)
library(ggbiplot)
## Loading required package: ggplot2
## Loading required package: scales
## Loading required package: grid
library(ggfortify)
##
## Attaching package: 'ggfortify'
## The following object is masked from 'package:ggbiplot':
##
## ggbiplot
library(embed)
## Loading required package: recipes
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
##
## step
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
##
## col_factor
library(tidytext)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(forcats)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.5 ✓ purrr 0.3.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x plotly::arrange() masks dplyr::arrange(), plyr::arrange()
## x dplyr::between() masks data.table::between()
## x readr::col_factor() masks scales::col_factor()
## x purrr::compact() masks plyr::compact()
## x dplyr::count() masks plyr::count()
## x purrr::discard() masks scales::discard()
## x dplyr::failwith() masks plyr::failwith()
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x dplyr::first() masks data.table::first()
## x stringr::fixed() masks recipes::fixed()
## x dplyr::id() masks plyr::id()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x plotly::mutate() masks dplyr::mutate(), plyr::mutate()
## x plotly::rename() masks dplyr::rename(), plyr::rename()
## x plotly::summarise() masks dplyr::summarise(), plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
## x purrr::transpose() masks data.table::transpose()
library(ggplot2)
library(Rtsne)
library(umap)
##Loading data.
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## pathology = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
##Applying countour on PCA
df <- dataset
df=dataset[,c(1,2,4:ncol(dataset))]
pca_gucci <- recipe(~., data = dataset) %>%
update_role(pathology, new_role = "id") %>%
step_normalize(all_predictors()) %>%
step_pca(all_predictors())
pca_prep <- prep(pca_gucci)
tidied_pca <- tidy(pca_prep, 2)
x=tidied_pca %>%
filter(component %in% paste0("PC", 1:5)) %>%
group_by(component) %>%
mutate(component = fct_inorder(component))
x=pivot_wider(tidied_pca,names_from=component,values_from = value)
x$terms=NULL
x$id=NULL
fig <- plot_ly(x, x = ~PC1, y = ~PC2, z = ~PC3, type = "contour",
width = 600, height = 500,contours = list(showlabels = TRUE),color = I("black"))
fig %>% colorbar(title = "PC3")
## Warning: Didn't find a colorbar to modify.
##Applying scatter plot on PCA.
juice(pca_prep) %>%
ggplot(aes(PC1, PC2, PC3,fill=pathology)) +
geom_point(aes(color = pathology), alpha = 0.7, size = 2) +
#stat_ellipse(geom="polygon",col="black",alpha=0.1)+
scale_color_manual(values=c("#31a354","#2b8cbe", "#fc9272"))
##Applying U-MAP on dataset and ScatterPlot.
umap_rec <- recipe(~., data = dataset) %>%
update_role(pathology, new_role = "id") %>%
step_normalize(all_predictors()) %>%
step_umap(all_predictors())
umap_prep <- prep(umap_rec)
juice(umap_prep)%>%
ggplot(aes(umap_1, umap_2,fill=pathology)) +
geom_point(aes(color = pathology), alpha = 0.7, size = 2) +
#stat_ellipse(geom="polygon",col="black",alpha=0.1)+
scale_color_manual(values=c("#31a354","#2b8cbe", "#fc9272"))
##Applying Contour on U-Map.
data.umap = umap(df, n_components=3)
dims_umap <- data.umap$layout
colnames(dims_umap) <- c("UMAP1", "UMAP2","UMAP3")
df_out = as.data.frame(dims_umap)
fig <- plot_ly(df_out, x = df_out$UMAP1, y = df_out$UMAP2, z = df_out$UMAP3, type = "contour", width = 600, height = 500,contours = list(showlabels = TRUE),color = I("black"))
fig %>% colorbar(title = "UMAP")
## Warning: Didn't find a colorbar to modify.
##Applying t-SNE on dataset and Contour on T-SNE.
dat.active <- df
tsne_out <- Rtsne(dat.active, dims = 3, perplexity=30, verbose=TRUE, max_iter = 500)
## Performing PCA
## Read the 122 x 50 data matrix successfully!
## OpenMP is working. 1 threads.
## Using no_dims = 3, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.02 seconds (sparsity = 0.875840)!
## Learning embedding...
## Iteration 50: error is 55.652561 (50 iterations in 0.05 seconds)
## Iteration 100: error is 56.284607 (50 iterations in 0.03 seconds)
## Iteration 150: error is 58.561825 (50 iterations in 0.04 seconds)
## Iteration 200: error is 56.004091 (50 iterations in 0.03 seconds)
## Iteration 250: error is 57.035669 (50 iterations in 0.04 seconds)
## Iteration 300: error is 1.018256 (50 iterations in 0.02 seconds)
## Iteration 350: error is 0.527153 (50 iterations in 0.03 seconds)
## Iteration 400: error is 0.309658 (50 iterations in 0.02 seconds)
## Iteration 450: error is 0.268569 (50 iterations in 0.02 seconds)
## Iteration 500: error is 0.257745 (50 iterations in 0.02 seconds)
## Fitting performed in 0.29 seconds.
t=data.frame(tsne1=tsne_out$Y[,1],tsne2=tsne_out$Y[,2],tsne3=tsne_out$Y[,3])#,pathology=dataset$pathology)
fig <- plot_ly(t, x = t$tsne1, y = t$tsne2, z = t$tsne3, type = "contour", width = 600, height = 500,contours = list(showlabels = TRUE),color = I("black"))
fig %>% colorbar(title = "Tsne3")
## Warning: Didn't find a colorbar to modify.
##Scatter Plot on tsne.
t=data.frame(tsne1=tsne_out$Y[,1],tsne2=tsne_out$Y[,2],tsne3=tsne_out$Y[,3],pathology=dataset$pathology)
t%>%ggplot(aes(tsne1, tsne2,fill=pathology)) +
geom_point(aes(color = pathology), alpha = 0.7, size = 2) +
#stat_ellipse(geom="polygon",col="black",alpha=0.1)+
scale_color_manual(values=c("#31a354","#2b8cbe", "#fc9272"))
##hyper Prarmetre tuning.
set.seed(1) # for reproducibility
perpl=30
iterations=500
learning=200
perpl <- c(30,35)
i=1
tsne_out <- Rtsne(df, dims = 3, perplexity=perpl[i], verbose=TRUE, max_iter=iterations, eta=learning)
## Performing PCA
## Read the 122 x 50 data matrix successfully!
## OpenMP is working. 1 threads.
## Using no_dims = 3, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.02 seconds (sparsity = 0.875840)!
## Learning embedding...
## Iteration 50: error is 51.737993 (50 iterations in 0.03 seconds)
## Iteration 100: error is 55.399837 (50 iterations in 0.04 seconds)
## Iteration 150: error is 53.816672 (50 iterations in 0.03 seconds)
## Iteration 200: error is 55.667034 (50 iterations in 0.03 seconds)
## Iteration 250: error is 54.217633 (50 iterations in 0.04 seconds)
## Iteration 300: error is 1.014411 (50 iterations in 0.02 seconds)
## Iteration 350: error is 0.450896 (50 iterations in 0.02 seconds)
## Iteration 400: error is 0.348436 (50 iterations in 0.02 seconds)
## Iteration 450: error is 0.282917 (50 iterations in 0.02 seconds)
## Iteration 500: error is 0.264900 (50 iterations in 0.02 seconds)
## Fitting performed in 0.28 seconds.
t=data.frame(tsne1=tsne_out$Y[,1],tsne2=tsne_out$Y[,2],tsne3=tsne_out$Y[,3])#,pathology=dataset$pathology)
fig <- plot_ly(t, x = t$tsne1, y = t$tsne2, z = t$tsne3, type = "contour", width = 600, height = 500,contours = list(showlabels = TRUE),color = I("black"))
fig %>% colorbar(title = "Tsne3")
## Warning: Didn't find a colorbar to modify.
i=i+1
tsne_out <- Rtsne(df, dims = 3, perplexity=perpl[i], verbose=TRUE, max_iter=iterations, eta=learning)
## Performing PCA
## Read the 122 x 50 data matrix successfully!
## OpenMP is working. 1 threads.
## Using no_dims = 3, perplexity = 35.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.02 seconds (sparsity = 0.961973)!
## Learning embedding...
## Iteration 50: error is 51.148279 (50 iterations in 0.03 seconds)
## Iteration 100: error is 54.401081 (50 iterations in 0.04 seconds)
## Iteration 150: error is 56.144805 (50 iterations in 0.06 seconds)
## Iteration 200: error is 55.124023 (50 iterations in 0.07 seconds)
## Iteration 250: error is 49.930915 (50 iterations in 0.07 seconds)
## Iteration 300: error is 1.135920 (50 iterations in 0.03 seconds)
## Iteration 350: error is 0.522236 (50 iterations in 0.02 seconds)
## Iteration 400: error is 0.299130 (50 iterations in 0.02 seconds)
## Iteration 450: error is 0.245188 (50 iterations in 0.02 seconds)
## Iteration 500: error is 0.227364 (50 iterations in 0.02 seconds)
## Fitting performed in 0.39 seconds.
t=data.frame(tsne1=tsne_out$Y[,1],tsne2=tsne_out$Y[,2],tsne3=tsne_out$Y[,3])#,pathology=dataset$pathology)
fig <- plot_ly(t, x = t$tsne1, y = t$tsne2, z = t$tsne3, type = "contour", width = 600, height = 500,contours = list(showlabels = TRUE),color = I("black"))
fig %>% colorbar(title = "Tsne3")
## Warning: Didn't find a colorbar to modify.
##References:
#https://cran.r-project.org/web/packages/ggfortify/vignettes/plot_pca.html
#https://juliasilge.com/blog/cocktail-recipes-umap/
#https://jkzorz.github.io/2020/02/29/contour-plots.html
#https://www.r-statistics.com/2016/07/using-2d-contour-plots-within-ggplot2-to-visualize-relationships-between-three-variables/